This notebook performs differential expression analysis of human endogenous retroviruses (L1s) across cell-type clusters and brain regions from ASAP PMDBS snRNA-seq data.
Specifically, it: * Defines helper functions for extracting significant genes and visualizing expression (custom getSignName, getAverage, meanPlot_cus, and box_pseudobulk). * Loads TE (L1s) count data generated from trusTEr outputs and organizes them by region and cluster. * Builds DESeq2 models to compare PD vs. control expression across multiple clusters. * Produces summary tables of DE results, mean expression plots, and box plots for selected example L1s. * Identifies overlaps of upregulated L1s across cell types and regions using UpSet plots. * Compares in vivo upregulated L1s in microglia with in vitro stimulation experiments.
Loading data and needed libraries
library(UpSetR)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
library(grid)
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first(), S4Vectors::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks data.table::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
load("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/preprocessed_TE_data.RData")
source("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/TE_DEA_functions.R")
source("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/TE_heatmap_functions.R")
# Load samplesheet
samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
# I want to split TE_data$te_counts per region, but first I need to categorize the columns by region
clusters <- colnames(TE_data$te_counts)[7:ncol(TE_data$te_counts)]
clusters_per_region <- data.frame(cluster = clusters,
region = str_extract(clusters, "SN|PUT|PFC|AMY")) %>%
group_by(region) %>%
summarise(clusters = list(cluster)) %>%
ungroup()
expressed_L1s_bulkrna <- fread("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_bulkRNAseq/results/tables/expressed_FL_L1HS_L1PA3_expressed.tab", data.table = F, header = F)$V1
# Now we loop through the regions and select only the region's columns
te_counts <- list()
for (i in 1:nrow(clusters_per_region)) {
region <- clusters_per_region$region[i]
cluster_cols <- clusters_per_region$clusters[[i]] # these are vectors of column names
# Subset counts dataframe by these columns and keep Geneid
te_counts[[region]] <- TE_data$te_counts %>%
select(Geneid, all_of(cluster_cols)) %>%
filter(Geneid %in% expressed_L1s_bulkrna) # Expressed in bulk RNA
}
head(te_counts$SN)
te_dds_list <- list()
te_exp_list <- list()
te_res_list <- list()
te_res_list_df <- list()
regions <- c("SN", "PUT", "PFC", "AMY")
for(cluster in as.character(0:7)){
for(region in regions){
print(cluster)
print(region)
samplesheet_list <- split(samplesheet, f = samplesheet$Region)
rownames(samplesheet_list[[region]]) <- samplesheet_list[[region]]$Sample
samples_cluster <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
samples_cluster <- samples_cluster[which(samples_cluster %in% colnames(te_counts[[region]]))]
rownames(samplesheet_list[[region]]) <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
# Expressed in snRNA
te_counts_expressed <- te_counts[[region]][which(rowSums(te_counts[[region]][,samples_cluster]) > 0),]
print(paste0("Cluster ", cluster, ", region ", region, " expresses ", nrow(te_counts_expressed)))
region_cluster <- paste0(region, "_", cluster)
te_dds_list[[region_cluster]] <- DESeqDataSetFromMatrix((te_counts_expressed[,samples_cluster]+1), samplesheet_list[[region]][samples_cluster,], design = ~ Dx)
te_dds_list[[region_cluster]]$Dx <- relevel(te_dds_list[[region_cluster]]$Dx, "Ctl")
error_handle <- tryCatch({
te_dds_list[[region_cluster]] <- DESeq(te_dds_list[[region_cluster]])
}, error = function(e) {
print(paste("Something happened with ", region, cluster))
return(1)
})
if(is.numeric(error_handle)){
if(error_handle == 1){
te_dds_list[[region_cluster]] <- NA
te_res_list[[region_cluster]] <- NA
te_res_list_df[[region_cluster]] <- NA
te_exp_list[[region_cluster]] <- NA
}
}else{
te_exp_list[[region_cluster]] <- getAverage(te_dds_list[[region_cluster]])
te_res_list[[region_cluster]] <- lfcShrink(te_dds_list[[region_cluster]], "Dx_PD_vs_Ctl")
te_res_list_df[[region_cluster]] <- as.data.frame(te_res_list[[region_cluster]])
te_res_list_df[[region_cluster]]$L1s_id <- rownames(te_res_list_df[[region_cluster]])
}
}
}
## [1] "0"
## [1] "SN"
## [1] "Cluster 0, region SN expresses 98"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "PUT"
## [1] "Cluster 0, region PUT expresses 518"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 25 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "PFC"
## [1] "Cluster 0, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 11 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "AMY"
## [1] "Cluster 0, region AMY expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 16 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "SN"
## [1] "Cluster 1, region SN expresses 458"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 26 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## [1] "1"
## [1] "PUT"
## [1] "Cluster 1, region PUT expresses 515"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 25 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "PFC"
## [1] "Cluster 1, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## [1] "1"
## [1] "AMY"
## [1] "Cluster 1, region AMY expresses 518"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 11 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "SN"
## [1] "Cluster 2, region SN expresses 517"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 21 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "PUT"
## [1] "Cluster 2, region PUT expresses 519"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 15 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "PFC"
## [1] "Cluster 2, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 14 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "AMY"
## [1] "Cluster 2, region AMY expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 17 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "SN"
## [1] "Cluster 3, region SN expresses 519"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 23 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "PUT"
## [1] "Cluster 3, region PUT expresses 514"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 24 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "PFC"
## [1] "Cluster 3, region PFC expresses 513"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "AMY"
## [1] "Cluster 3, region AMY expresses 519"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 28 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "SN"
## [1] "Cluster 4, region SN expresses 487"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 14 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "PUT"
## [1] "Cluster 4, region PUT expresses 476"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 20 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "PFC"
## [1] "Cluster 4, region PFC expresses 442"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 6 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "AMY"
## [1] "Cluster 4, region AMY expresses 490"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 9 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "SN"
## [1] "Cluster 5, region SN expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 18 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "PUT"
## [1] "Cluster 5, region PUT expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 15 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "PFC"
## [1] "Cluster 5, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 23 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "AMY"
## [1] "Cluster 5, region AMY expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 36 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## [1] "6"
## [1] "SN"
## [1] "Cluster 6, region SN expresses 324"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 15 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "PUT"
## [1] "Cluster 6, region PUT expresses 226"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 5 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "PFC"
## [1] "Cluster 6, region PFC expresses 338"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 16 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "AMY"
## [1] "Cluster 6, region AMY expresses 263"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 13 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "SN"
## [1] "Cluster 7, region SN expresses 212"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 3 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "PUT"
## [1] "Cluster 7, region PUT expresses 91"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 5 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "PFC"
## [1] "Cluster 7, region PFC expresses 62"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : Estimated rdf < 1.0; not estimating variance
## final dispersion estimates
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "AMY"
## [1] "Cluster 7, region AMY expresses 102"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 5 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
openxlsx::write.xlsx(te_res_list_df, paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/L1s_PDvsCtl_res.xlsx", sep=""))
meanPlot_cus())mean_plots_list <- list()
te_res_expressed_list_df <- list()
for(region_cluster in names(te_exp_list)){
if(!all(is.na(te_res_list[[region_cluster]]))){
region <- sapply(str_split(region_cluster, "_"), `[[`, 1)
effect <- unique(samplesheet_list[[region]][which(samplesheet_list[[region]]$Dx != "Ctl"),"Dx"])
colors_list <- pick_colors_meanplot(te_res_list[[region_cluster]])
mean_plots_list[[region_cluster]] <- meanPlot_cus(exp = te_exp_list[[region_cluster]]$Mean,
test = te_res_list[[region_cluster]],
col1=colors_list$col1, col2=colors_list$col2, col3=colors_list$col3,
c1 = effect, c2="Ctl", p = 0.05, l=1, ttl = region_cluster, repel = F)
}
}
# Neurons (cluster 0):
# Very little number of nuclei in SN in this cluster - ignored
mean_plots_list$PUT_0
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_0
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_0
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Neurons (cluster 1)
mean_plots_list$PUT_1
## Warning: Removed 515 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_1
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_1
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Astrocytes
mean_plots_list$SN_2
## Warning: Removed 517 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_2
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_2
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_2
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
# OPCs
mean_plots_list$SN_3
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_3
## Warning: Removed 514 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_3
## Warning: Removed 513 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_3
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Microglia
mean_plots_list$SN_4
## Warning: Removed 487 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_4
## Warning: Removed 476 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_4
## Warning: Removed 442 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_4
## Warning: Removed 490 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Oligodendrocytes
mean_plots_list$SN_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).
# VLMCs and Tcells skipped as their clusters are too small
L1s_counts_norm <- list()
for(region_cluster in names(te_dds_list)){
L1s_counts_norm[[region_cluster]] <- list()
error_handle <- tryCatch({
L1s_counts_norm[[region_cluster]] <- counts(te_dds_list[[region_cluster]], normalize = T)
}, error = function(e) {
print(paste("Something happened with ", region_cluster))
return(1)
})
}
samplesheet_list[["SN"]]$sample_cluster <- paste(samplesheet_list$SN$Sample, "4", sep="_")
samplesheet_list[["PUT"]]$sample_cluster <- paste(samplesheet_list$PUT$Sample, "4", sep="_")
plot_list_sn <- list()
plot_list_put <- list()
for (L1s in te_res_list_df$SN_4[which(te_res_list_df$SN_4$padj < 0.05 & te_res_list_df$SN_4$log2FoldChange > 0),"TE_id"]){
plot_list_sn[[L1s]] <- box_pseudobulk(df = L1s_counts_norm$SN_4,res = te_res_list_df$SN_4,gene = L1s, log2=F)
plot_list_put[[L1s]] <- box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, L1s, log2=F)
}
box_pseudobulk(df = L1s_counts_norm$SN_4, te_res_list_df$SN_4, "L1PA2_dup1272", log2=T)
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, "L1PA2_dup1272", log2=T)
box_pseudobulk(df = L1s_counts_norm$PFC_4, te_res_list_df$PFC_4, "L1PA2_dup1272", log2=T)
box_pseudobulk(df = L1s_counts_norm$AMY_4, te_res_list_df$AMY_4, "L1PA2_dup1272", log2=T)
box_pseudobulk(df = L1s_counts_norm$SN_4, te_res_list_df$SN_4, "L1PA3_dup8310", log2=T)
box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, "L1PA3_dup8310", log2=T)
box_pseudobulk(df = L1s_counts_norm$PFC_4, te_res_list_df$PFC_4, "L1PA3_dup8310", log2=T)
box_pseudobulk(df = L1s_counts_norm$AMY_4, te_res_list_df$AMY_4, "L1PA3_dup8310", log2=T)
box_pseudobulk(df = L1s_counts_norm$SN_4, te_res_list_df$SN_4, "L1PA3_dup6434", log2=T)
box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, "L1PA3_dup6434", log2=T)
box_pseudobulk(df = L1s_counts_norm$PFC_4, te_res_list_df$PFC_4, "L1PA3_dup6434", log2=T)
box_pseudobulk(df = L1s_counts_norm$AMY_4, te_res_list_df$AMY_4, "L1PA3_dup6434", log2=T)
get_significant_L1s <- function(df){
det <- df[which(df$padj < 0.05 & df$log2FoldChange > 1), "TE_id"]
if (length(det) > 0){
return(det)
}else{
return(NA)
}
}
# Astrocytes
upset(fromList(sapply(list("SN_2" = te_res_list_df$SN_2,
"PUT_2" = te_res_list_df$PUT_2,
"PFC_2" = te_res_list_df$PFC_2,
"AMY_2" = te_res_list_df$AMY_2), get_significant_L1s)))
grid.text("Astrocytes", vjust = -17, hjust= -3)
# Microglia
upset(fromList(sapply(list("SN_4" = te_res_list_df$SN_4,
"PUT_4" = te_res_list_df$PUT_4,
"PFC_4" = te_res_list_df$PFC_4,
"AMY_4" = te_res_list_df$AMY_4), get_significant_L1s)))
grid.text("Microglia", vjust = -17, hjust= -3)
# OPCs
upset(fromList(sapply(list("SN_3" = te_res_list_df$SN_3,
"PUT_3" = te_res_list_df$PUT_3,
"PFC_3" = te_res_list_df$PFC_3,
"AMY_3" = te_res_list_df$AMY_3), get_significant_L1s)))
grid.text("OPCs", vjust = -17, hjust= -3)
# Oligos
upset(fromList(sapply(list("SN_5" = te_res_list_df$SN_5,
"PUT_5" = te_res_list_df$PUT_5,
"PFC_5" = te_res_list_df$PFC_5,
"AMY_5" = te_res_list_df$AMY_5), get_significant_L1s)))
grid.text("Oligodendrocytes", vjust = -17, hjust= 0)
# Neurons cluster 1
upset(fromList(sapply(list("PUT_1" = te_res_list_df$PUT_1,
"PFC_1" = te_res_list_df$PFC_1,
"AMY_1" = te_res_list_df$AMY_1), get_significant_L1s)))
grid.text("Neurons cluster 1", vjust = -17, hjust= -3)
# All in PUT
upset(fromList(list("OPCs" = get_significant_L1s(te_res_list_df$PUT_3),
# "Oligos" = get_significant_L1s(te_res_list_df$PUT_5),
"Microglia" = get_significant_L1s(te_res_list_df$PUT_4),
# "VLMCs" = get_significant_L1s(te_res_list_df$PUT_6),
# "T cells" = get_significant_L1s(te_res_list_df$PUT_7),
# "Exc Neurons" = get_significant_L1s(te_res_list_df$PUT_0),
# "Inh Neurons" = get_significant_L1s(te_res_list_df$PUT_1),
"Astrocytes" = get_significant_L1s(te_res_list_df$PUT_2))), sets = c("OPCs", "Microglia", "Astrocytes"), nintersects = 100, nsets = 100, mainbar.y.max = 30)
grid.text("Putamen", vjust = -17, hjust= 0)
# All in AMY
upset(fromList(list("Exc Neurons" = get_significant_L1s(te_res_list_df$AMY_0),
"Inh Neurons" = get_significant_L1s(te_res_list_df$AMY_1),
"Astrocytes" = get_significant_L1s(te_res_list_df$AMY_2),
# "OPCs" = get_significant_L1s(te_res_list_df$AMY_3),
"Oligos" = get_significant_L1s(te_res_list_df$AMY_5)
# "Microglia" = get_significant_L1s(te_res_list_df$AMY_4),
# "VLMCs" = get_significant_L1s(te_res_list_df$AMY_6),
# "T cells" = get_significant_L1s(te_res_list_df$AMY_7)
)), sets = c("Exc Neurons", "Inh Neurons", "Astrocytes", "Oligos"), nintersects = 100, nsets = 100, mainbar.y.max = 30)
grid.text("Amygdala", vjust = -17, hjust= 0)
upset(fromList(list("SN: OPCs" = get_significant_L1s(te_res_list_df$SN_3),
"SN: Oligos" = get_significant_L1s(te_res_list_df$SN_5),
"SN: Microglia" = get_significant_L1s(te_res_list_df$SN_4),
"SN: VLMCs" = get_significant_L1s(te_res_list_df$SN_6),
"SN: T cells" = get_significant_L1s(te_res_list_df$SN_7),
"SN: Neurons cluster 0" = get_significant_L1s(te_res_list_df$SN_0),
"SN: Neurons cluster 1" = get_significant_L1s(te_res_list_df$SN_1),
"SN: Astrocytes" = get_significant_L1s(te_res_list_df$SN_2),
"PUT: OPCs" = get_significant_L1s(te_res_list_df$PUT_3),
"PUT: Oligos" = get_significant_L1s(te_res_list_df$PUT_5),
"PUT: Microglia" = get_significant_L1s(te_res_list_df$PUT_4),
"PUT: VLMCs" = get_significant_L1s(te_res_list_df$PUT_6),
"PUT: T cells" = get_significant_L1s(te_res_list_df$PUT_7),
"PUT: Neurons cluster 0" = get_significant_L1s(te_res_list_df$PUT_0),
"PUT: Neurons cluster 1" = get_significant_L1s(te_res_list_df$PUT_1),
"PUT: Astrocytes" = get_significant_L1s(te_res_list_df$PUT_2),
"PFC: OPCs" = get_significant_L1s(te_res_list_df$PFC_3),
"PFC: Oligos" = get_significant_L1s(te_res_list_df$PFC_5),
"PFC: Microglia" = get_significant_L1s(te_res_list_df$PFC_4),
"PFC: VLMCs" = get_significant_L1s(te_res_list_df$PFC_6),
"PFC: T cells" = get_significant_L1s(te_res_list_df$PFC_7),
"PFC: Neurons cluster 0" = get_significant_L1s(te_res_list_df$PFC_0),
"PFC: Neurons cluster 1" = get_significant_L1s(te_res_list_df$PFC_1),
"PFC: Astrocytes" = get_significant_L1s(te_res_list_df$PFC_2),
"AMY: OPCs" = get_significant_L1s(te_res_list_df$AMY_3),
"AMY: Oligos" = get_significant_L1s(te_res_list_df$AMY_5),
"AMY: Microglia" = get_significant_L1s(te_res_list_df$AMY_4),
"AMY: VLMCs" = get_significant_L1s(te_res_list_df$AMY_6),
"AMY: T cells" = get_significant_L1s(te_res_list_df$AMY_7),
"AMY: Neurons cluster 0" = get_significant_L1s(te_res_list_df$AMY_0),
"AMY: Neurons cluster 1" = get_significant_L1s(te_res_list_df$AMY_1),
"AMY: Astrocytes" = get_significant_L1s(te_res_list_df$AMY_2))), nintersects = 100, nsets = 100, mainbar.y.max = 30)